/*
Learning from Law Enforcement
Libor Dusek and Christian Traxler

Do-file producing regression results and figures in the paper and (online) appendix

*************************************
*** DESCRIPTION AND PRELIMINARIES
**************************************

*** This do file: RD_05.do
    Heaping/Bunching Analysis at first (enforcement) and second (high-fine) cutoff
    => Sections I -- IV:  Cutoff 1
    => Sections V -- VI:  Cutoff 2

*** Required packages:
    cmogram
    DCdensityThree.ado and DCdensityFour.ado (available via project's repository)

*** The input dataset:
    Dusek_Traxler_RD_a4.dta
	
*** Output is stored in output directories: OUTPUT/RD_TAB  OUTPUT/RD_FIG , and OUTPUT/RD_temp
	Names of outpupt files correspond to the Table/Figure numbers in the paper and (online) appendix
	
*** List of Figures produced:
    => Figures A.2, A.3, A.4
    See also Section C of the ReadMe File.
*/

cap clear
set more off

/* Set directories */
global dir C:/yourdir/
cd    $dir
global datadir $dir/datasets/
global outputdir outfiles/
capture mkdir ${outputdir}
capture mkdir ${outputdir}/RD_TAB
capture mkdir ${outputdir}/RD_FIG
capture mkdir ${outputdir}/RD_temp

cap log close
log using    $dir/${outputdir}/log_RD_05.txt, replace

/* Set Global Paraemters */
global A = 4
*Length of Assignment Period, in months (A)
global F = 4
*Length of Follow-Up Period, in months (F)

/* Load Data with an Assignment Period of A months */
use $datadir/Dusek_Traxler_RD_a${A}.dta


/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* I.    DEFINE SAMPLE & VARIABLES  (*partialy* identical to RD_01), Cut 1
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

rename period_follup_${F} period_follup
drop period_follup_*
rename period_follup period_post
rename dum_haspost_${F} dum_haspost
drop dum_haspost_*
rename period_pre_${F} period_pre
drop period_pre_*

/* We only keep "relevant" observations during the pre or post [=outcome]
   period */
keep if (period_pre==1 | period_post==1 | dum_trigger==1)

/* Next we focus on observations with a trigger date before the enforcement
   cutoff was adjusted (early July 2017) */
keep if day_trigger<td(1jul2017)

/* Defines assignment speed (S_i in Section 4.1 of the paper, "speed_Z" here),
   normalized by the specific cutoff k (here: enforcement cutoff at 14km/h above
   the speed limit  */
gen speed_Z = maxspeed - 14

/* define first relevant "episode" (after sample restrictions) */
bysort id: gegen epi_min=min(episode)

/* Note 1: Different from RD_01.do, sample does NOT (yet) zoom in around
   Cutoff 1 (***keep if speed_Z>-14 & speed_Z<9);
   Note 2: We do not focus (yet) on first episode (***keep if
   episode==epi_min) */

/* Number of Rides in PRE(!) treatment period */
gen temp=1
by id: gegen xxnrides_pre = sum(temp) if (period_pre==1 | dum_trigger==1)
by id: gegen   nrides_pre = mean(xxnrides_pre)
drop xxnrides_* temp

/* Driving frequency in pre-treatment (!) period */
gen length_pre = 1 + day_endwait - day_first
gen driving_frequency = nrides_pre/length_pre

/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* II.   Distribution of Assignment Speed (speed_Z), Full Sample, Cut 1
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/* Sample: cross-section of cars in first episode;  Note: this is slighly
   larger as main sample, mainly b/c we do not restrict assignment speed. */
gen     ESTSAMPLE=0
replace ESTSAMPLE=1 if day == day_trigger & dum_haspost==1 & episode==epi_min

/* Figure A.2, Subfigure (a) */
preserve
keep if ESTSAMPLE==1
cap drop Xj Yj r0 fhat se_fhat
DCdensityThree speed_Z if speed_Z<22 & speed_Z>-37 & ESTSAMPLE==1, breakpoint(0) generate(Xj Yj r0 fhat se_fhat) graphname(${outputdir}/RD_FIG/Fig_A2_a_Distrib.pdf)
restore

/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* III.   Distribution of Assignment Speed (speed_Z), Constrained Sample, Cut 1
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/* Now Focus Sample around Enforcement Cutoff  */
keep if speed_Z>-15 & speed_Z<10

/* Redefine first episode (with assignment speed in sample range) */
cap drop epi_min
egen epi_min=min(episode), by(id)

/* Note: Frequent vs non-frequent (see IV. below) is defined based on the
   (pre-treatment) period of the 1st relevant episode. */

replace ESTSAMPLE=0
replace ESTSAMPLE=1 if  day == day_trigger & episode==epi_min & dum_haspost==1

qui su driving_frequency if ESTSAMPLE==1,d
gen     frequent=0 if ESTSAMPLE==1 & driving_frequency< r(p50)
replace frequent=1 if ESTSAMPLE==1 & driving_frequency>=r(p50) & driving_frequency~=.

/* Figure A.2, Subfigure (c) */
gen counter =1
cmogram counter speed_Z if speed_Z<=5 & speed_Z>=-5 & ESTSAMPLE==1, histopts(bin(50)) cutpoint(0) count
 graph export ${outputdir}/RD_FIG/Fig_A2_c_Hist-Cut1.pdf,replace
drop counter

/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* IV.   Distribution of speed_Z in Subsamples, over time, Cut 1
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/* New sample constrained: do NOT focus on first, relevant episode */
replace ESTSAMPLE=0
replace ESTSAMPLE=1 if day==day_trigger  &  dum_haspost==1

/* Figure A.3, six subfigures */
preserve
forvalues i = 1(1)6 {
 cap drop Xj Yj r0 fhat se_fhat
 DCdensityFour speed_Z if speed_Z<10 & speed_Z>-15 &  month>= 657+((`i'-1)*6) & month< 657 + 6*`i' & ESTSAMPLE==1, breakpoint(0) generate(Xj Yj r0 fhat se_fhat) graphname(${outputdir}/RD_FIG/Fig_A3_all_T`i'.pdf)
}
restore, preserve

/* Figure A.4, six subfigures */
forvalues i = 1(1)6 {
 cap drop Xj Yj r0 fhat se_fhat
 DCdensityFour speed_Z if speed_Z<10 & speed_Z>-15 &  month>= 657+((`i'-1)*6) & month< 657 + 6*`i' & ESTSAMPLE==1 & (plate_A==1 | plate_S==1) & frequent==1, breakpoint(0) generate(Xj Yj r0 fhat se_fhat) graphname(${outputdir}/RD_FIG/Fig_A4_locals_frequent_T`i'.pdf)
}
restore
clear

/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* V.    SAMPLE & VARIABLES  for Cutoff 2 (similar to RD_02)
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

cap clear
use $datadir/Dusek_Traxler_RD_a${A}.dta

rename period_follup_${F} period_follup
drop period_follup_*
rename period_follup period_post
rename dum_haspost_${F} dum_haspost
drop dum_haspost_*
rename period_pre_${F} period_pre
drop period_pre_*

keep if (period_pre==1 | period_post==1 | dum_trigger==1)

/* Defines assignment speed (S_i in Section 4.1 of the paper, "speed_Z" here),
   normalized by the high-fine cutoff (23km/h above speed limit) */
gen speed_Z = maxspeed - 23

keep if speed_Z>-15 & speed_Z<20

/* define first relevant "episode" (after sample restrictions) */
bysort id: gegen epi_min=min(episode)


/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
* VI.   Distribution of Assignment Speed (speed_Z), Constrained Sample, Cut 2
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/* Sample: cross-section of cars in first, relevant episode */
gen     ESTSAMPLE=0
replace ESTSAMPLE=1 if day == day_trigger & dum_haspost==1 & episode==epi_min

/* Figure A.2, Subfigure (b) */

preserve
cap drop Xj Yj r0 fhat se_fhat
DCdensityFour speed_Z if speed_Z<10 & speed_Z>-15 &  ESTSAMPLE==1, breakpoint(0) generate(Xj Yj r0 fhat se_fhat) graphname(${outputdir}/RD_FIG/Fig_A2_b_Distrib.pdf)
restore

/* Figure A.2, Subfigure (d) */

gen counter =1
cmogram counter speed_Z if speed_Z<=5 & speed_Z>=-5 & ESTSAMPLE==1, histopts(bin(50)) cutpoint(0) count
 graph export ${outputdir}/RD_FIG/Fig_A2_d_Hist-Cut2.pdf,replace

log close
